{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "执行操作： abs_energy/这一段总的样本数"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import numpy as np\n",
    "import joblib\n",
    "from joblib import Parallel,delayed\n",
    "import os"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "APPLY_COLS = ['abs_energy', 'absolute_sum_of_changes', 'longest_strike_below_mean', \n",
    "'longest_strike_above_mean', 'count_above_mean', 'count_below_mean', \n",
    "'sum_of_reoccurring_values', 'sum_of_reoccurring_data_points']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(110027, 37)"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "concat = joblib.load('./concats/concat_01.lz4')\n",
    "concat.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Index(['RULR', 'csv_no', 'CL', 'CLI', 'spindle_load', 'Current_1__abs_energy',\n",
       "       'Current_1__kurtosis', 'Current_1__length', 'Current_1__mean',\n",
       "       'Current_1__mean_abs_change', 'Current_1__mean_change',\n",
       "       'Current_1__skewness', 'Current_1__variance', 'Vibration_1__abs_energy',\n",
       "       'Vibration_1__kurtosis', 'Vibration_1__length', 'Vibration_1__mean',\n",
       "       'Vibration_1__mean_abs_change', 'Vibration_1__mean_change',\n",
       "       'Vibration_1__skewness', 'Vibration_1__variance',\n",
       "       'Vibration_2__abs_energy', 'Vibration_2__kurtosis',\n",
       "       'Vibration_2__length', 'Vibration_2__mean',\n",
       "       'Vibration_2__mean_abs_change', 'Vibration_2__mean_change',\n",
       "       'Vibration_2__skewness', 'Vibration_2__variance',\n",
       "       'Vibration_3__abs_energy', 'Vibration_3__kurtosis',\n",
       "       'Vibration_3__length', 'Vibration_3__mean',\n",
       "       'Vibration_3__mean_abs_change', 'Vibration_3__mean_change',\n",
       "       'Vibration_3__skewness', 'Vibration_3__variance'],\n",
       "      dtype='object')"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "concat.columns"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "def scale_tsfeature(data_no, apply_cols=None):\n",
    "    if apply_cols==None:\n",
    "        apply_cols = []\n",
    "    # scale abs_energy\n",
    "    concat = joblib.load('./concats/concat_0%d.lz4'%data_no)\n",
    "    # scale\n",
    "    length_arr = concat['Current_1__length'].values\n",
    "    \n",
    "    def func(filter_col, apply_cols):\n",
    "        for col in apply_cols:\n",
    "            if col in filter_col:\n",
    "                return True\n",
    "        return False\n",
    "    \n",
    "    scale_cols = [col for col in concat.columns if func(col, apply_cols)]\n",
    "    for col in scale_cols:\n",
    "        concat[col+'_scale'] = concat[col]/length_arr\n",
    "    # drop useless cols\n",
    "    drop_cols = scale_cols + [col for col in concat.columns if 'length' in col]\n",
    "    return concat\n",
    "\n",
    "# def calc_moving_speed(df):\n",
    "#     diff_df = df[['x','y','z','distance_xy','distance_xz','distance_yz','distance_xyz', 'CL']].copy().diff()\n",
    "#     speed_arr = diff_df[['x','y','z','distance_xy','distance_xz','distance_yz','distance_xyz']].values / diff_df[['CL']].values\n",
    "#     speed_cols = [col+'__speed' for col in ['x','y','z','distance_xy','distance_xz','distance_yz','distance_xyz']]\n",
    "#     speed_df = pd.DataFrame(data=speed_arr, columns=speed_cols)\n",
    "#     result = pd.concat([df, speed_df], axis=1)\n",
    "    \n",
    "#     #　剔除nans和infs\n",
    "#     cond = np.isfinite(result).all(axis=1)\n",
    "#     print('nans和infs的数量：', result.shape[0]-cond.sum())\n",
    "#     result = result[cond]\n",
    "    \n",
    "#     #　剔除重复值\n",
    "#     result.drop_duplicates(subset='CL', inplace=True)\n",
    "    \n",
    "#     result.reset_index(drop=True, inplace=True)\n",
    "#     return result\n",
    "\n",
    "def write_files(data_no):\n",
    "    df_1 = scale_tsfeature(data_no, APPLY_COLS)\n",
    "#     df_2 = calc_moving_speed(df_1)\n",
    "    df_2 =df_1\n",
    "    if not os.path.exists('./concats_more'):\n",
    "        os.mkdir('./concats_more')\n",
    "    joblib.dump(df_2, './concats_more/concat_0%d.lz4'%data_no, compress='lz4')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "write_files(1)\n",
    "write_files(2)\n",
    "write_files(3)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.5"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
